Load Data

library(ggplot2)
library(data.table)
library(scales)
library(RColorBrewer)
library(reticulate)
library(uwot)
## Loading required package: Matrix
library(gridExtra)
library(spatstat)
## Loading required package: spatstat.data
## Loading required package: nlme
## Loading required package: rpart
## 
## spatstat 1.64-1       (nickname: 'Help you I can, yes!') 
## For an introduction to spatstat, type 'beginner'
## 
## Note: R version 3.6.0 (2019-04-26) is more than a year old; we strongly recommend upgrading to the latest version
## 
## Attaching package: 'spatstat'
## The following object is masked from 'package:scales':
## 
##     rescale
## The following object is masked from 'package:data.table':
## 
##     shift
library(ggdendro)
library(cowplot)
## 
## ********************************************************
## Note: As of version 1.0.0, cowplot does not change the
##   default ggplot2 theme anymore. To recover the previous
##   behavior, execute:
##   theme_set(theme_cowplot())
## ********************************************************
library(tidyverse)
## Registered S3 method overwritten by 'cli':
##   method     from    
##   print.boxx spatstat
## ── Attaching packages ───────────────────────────────────── tidyverse 1.3.0 ──
## ✓ tibble  3.0.1     ✓ dplyr   1.0.0
## ✓ tidyr   1.1.0     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.5.0
## ✓ purrr   0.3.4
## ── Conflicts ──────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::between()    masks data.table::between()
## x readr::col_factor() masks scales::col_factor()
## x dplyr::collapse()   masks nlme::collapse()
## x dplyr::combine()    masks gridExtra::combine()
## x purrr::discard()    masks scales::discard()
## x tidyr::expand()     masks Matrix::expand()
## x dplyr::filter()     masks stats::filter()
## x dplyr::first()      masks data.table::first()
## x dplyr::lag()        masks stats::lag()
## x dplyr::last()       masks data.table::last()
## x tidyr::pack()       masks Matrix::pack()
## x purrr::transpose()  masks data.table::transpose()
## x tidyr::unpack()     masks Matrix::unpack()
library(grid)
library(gridExtra) # add to AWS
library(cowplot) # add to AWS
library(slingshot) # add to AWS
## Loading required package: princurve
library(viridis) # add to AWS
## Loading required package: viridisLite
## 
## Attaching package: 'viridis'
## The following object is masked from 'package:scales':
## 
##     viridis_pal
library(reshape2)
## 
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
## 
##     smiths
## The following objects are masked from 'package:data.table':
## 
##     dcast, melt
library(RColorBrewer)
library(ggrepel)

use_condaenv(condaenv = 'r-reticulate', required = TRUE)

data_dir <- here::here('..','data')
load(file.path(data_dir, 'diff.sampled.Rdata'))
diff_dt <- fread(file.path(data_dir, 'diff.sampled.csv.gz'))

censor_negative_min = -1500
redo_umap_clustering = F
redo_umap_param_search = F

#only redo param search if doing clustering also:
redo_umap_clustering = redo_umap_clustering && redo_umap_clustering

# map day 0 to both plus and minus
map_day_0 <- function(df) {
  return(rbind(
    df[k562!='none'], #gets all cd19+-
    #df[rep(keep_none, .N) & k562=='none' & day > 0], 
    df[k562=='none' & day == 0][, k562 := 'cd19+'], #none and day 0, assign as 19+
    df[k562=='none' & day == 0][, k562 := 'cd19-'] #none and day 0, assign as 19-
  ))
}

'%notin%' <- Negate('%in%')

Filter Data

#filter diff_dt by cd4/8 type, cd19+ (map day 0), donor == 1 or 2

#cd4, cd19+, donor 
diff_dt_cd4 <- diff_dt %>% map_day_0() %>% filter(t_type == 'cd4', k562=='cd19+', donor == 2)

#cd8, cd19+, donor
diff_dt_cd8 <- diff_dt %>% map_day_0() %>% filter(t_type == 'cd8', k562=='cd19+', donor == 2)

UMAP

For now, skip straight to high dimensional plotting.

UMAP Functions

#grab channel map from cd4 data (cd4/8 are the same)
channel_map <- diff_opt_cd4$channel_map

# for now use all variables in the channel map except cd4/8
umap_vars <- c(paste0(names(channel_map),'_t'))

# removing zombie, cd_t, myc_t, gfp_t
umap_vars <- umap_vars[!umap_vars %in% c('cd_t', 'zombie_t', 'myc_t', 'gfp_t')]

run_umap <- function(dt, chosen_dist, chosen_n_neighbor, sample_n, umap_vars,
    chosen_learning_rate=0.1) {
  
  umap_dt <- dt[!is.na(event_id), 
    .SD[event_id %in% sample(unique(event_id), 
      min(sample_n, length(unique(event_id))))],
    by=c('well', 'plate', 'day')]
  
  # for now use all variables in the channel map
  #umap_vars <- c(paste0(names(channel_map),'_t'))
  
  #cast it so variables are columns and
  #subset sample umap data on variables
  umap_cast_dt <- data.table(dcast(umap_dt, 
    event_id + well + plate + day + t_type ~ variable, 
    value.var='value'))
  
  umap_dt_in <- umap_cast_dt[, ..umap_vars]
  
  # scale each input column
  umap_dt_in[ , 
    (names(umap_dt_in)) := lapply(.SD, scale), .SDcols = names(umap_dt_in)]
  
  # run umap via uwot library
  umap_out <- umap(umap_dt_in, min_dist=chosen_dist,
      learning_rate=chosen_learning_rate,
      n_sgd_threads=1,
      n_neighbors=chosen_n_neighbor, 
      verbose=T, n_threads=8, n_trees=50,
      ret_nn=T)
  
  # nearest neighbors in original space
  nearest_neighbors <- umap_out$nn$euclidean$idx
  neighbor_dist <- umap_out$nn$euclidean$dist
  edge_weights <- scales::rescale(-neighbor_dist, to=c(0,1))
  edge_weights <- edge_weights - min(edge_weights)
  umap_out <- data.table(umap_out$embedding)
  
  #nearest neighbors in embedding
  # umap_nn <- cbind(1:nrow(umap_fcs_dt), 
  #     nnwhich(umap_fcs_dt[, list(umap_1, umap_2)], k=c(1:3)))
  # umap_nd <- cbind(rep(0, nrow(umap_fcs_dt)), 
  #     nndist(umap_fcs_dt[, list(umap_1, umap_2)], k=c(1:3)))
  # umap_ew <- scales::rescale(-umap_nd, to=c(0,1))
  # umap_ew <- umap_ew - min(umap_ew)
      
  # rename the columns
  names(umap_out) <- c('umap_1','umap_2')
  
  # add the umap output to the input dt
  umap_fcs_dt <- cbind(umap_cast_dt[, 1:length(names(umap_cast_dt))], umap_out)
  umap_fcs_dt[, id := 1:.N]
  
  # add back the annotations
  umap_fcs_dt <- unique(umap_dt[, 
    .(donor, car, k562, t_type, day, well, event_id)])[
      umap_fcs_dt, on=.(t_type,well,day,event_id)]
  
  source_python(here::here('py/leiden_clust.py'))
  clust_membership <- leiden_clust(nearest_neighbors,
    edge_weights=scales::rescale(-neighbor_dist, to=c(0,1)))
  umap_fcs_dt[, cluster := factor(clust_membership+1)]
  
  #renumber the clusters
  cluster_ranks <- umap_fcs_dt[, list(mean_day=mean(day)), by=cluster][, rank_day := rank(mean_day)]
  
  umap_fcs_dt <- umap_fcs_dt[cluster_ranks, on='cluster'][, c('cluster', 'rank_day') := list(rank_day, NULL)]
  
  return(umap_fcs_dt)
}

Run UMAP

Scaling this to 200 events per well and checking CAR/condition separation.

umap_cd4_fcs_dt <- run_umap(diff_dt_cd4, 0.005, 15, 200, umap_vars)
umap_cd8_fcs_dt <- run_umap(diff_dt_cd8, 0.005, 15, 200, umap_vars)

fwrite(umap_cd4_fcs_dt, 
  compress='gzip', 
  file=file.path(here::here('..','data','diff.sampled.umap_cd4.csv.gz')))

fwrite(umap_cd8_fcs_dt, 
  compress='gzip', 
  file=file.path(here::here('..','data','diff.sampled.umap_cd8.csv.gz')))

# sync output back to s3
system('aws s3 sync \\
  --exclude "*" \\
  --include "*.Rdata" \\
  --include "*.csv*" \\
  ~/local-tcsl/data s3://roybal-tcsl//lenti_screen_compiled_data/data')

Cluster Analysis

UMAP Plots

#read in cd8 umap data
umap_cd8_fcs_dt <- fread(
  file.path(
    here::here('..','data','diff.sampled.umap_cd8.csv.gz')))
umap_cd8_fcs_dt[, cluster := factor(cluster)]

umap_cd8_cluster_dt <- umap_cd8_fcs_dt[, list(
  mean_umap_1= mean(umap_1), 
  mean_umap_2= mean(umap_2), 
  size= .N), by=cluster]

#read in cd4 umap data
umap_cd4_fcs_dt <- fread(
  file.path(
    here::here('..','data','diff.sampled.umap_cd4.csv.gz')))
umap_cd4_fcs_dt[, cluster := factor(cluster)]

umap_cd4_cluster_dt <- umap_cd4_fcs_dt[, list(
  mean_umap_1= mean(umap_1), 
  mean_umap_2= mean(umap_2), 
  size= .N), by=cluster]

#plot umap
ggumap <- function(df, df_cluster) {
  # whole plot
  umap_whole <- ggplot() + 
  geom_point(data=df, 
    aes(x=umap_1, y=umap_2, color=cluster), size=0.2, alpha=0.2) +
  geom_label(data=df_cluster, aes(
    x=mean_umap_1, y=mean_umap_2,
    label=paste(cluster,size,sep='\n'), 
    color=cluster), alpha=0.3) +
  theme_minimal() + ggtitle('UMAP by Cluster')

  # individual plots
  umap_individual <- ggplot() + 
  geom_point(data=df, 
      aes(x=umap_1, y=umap_2, color=cluster), size=0.2, alpha=0.2) +
    geom_label(data=df_cluster, aes(
      x=mean_umap_1, y=mean_umap_2,
      label=paste(cluster,size,sep='\n'), 
      color=cluster), alpha=0.3) +
    facet_wrap(~cluster) +
    theme_bw()

  # UMAP by day
  label_days_umap <- df %>% group_by(day) %>% select(umap_1, umap_2) %>% summarize_all(mean)
  umap_by_day <- ggplot(df, aes(x=umap_1, y=umap_2, color=as.factor(day)))+geom_point(size=0.1,alpha = 0.5, position=position_jitter(h=0.15,w=0.15))+theme(panel.grid.major = element_blank(), panel.grid.minor=element_blank())+geom_label_repel(aes(label=day), data=label_days_umap)+guides(colour=FALSE)+ggtitle('UMAP by Day') + scale_color_viridis(discrete=TRUE)+theme_minimal()

    grid.arrange(umap_by_day, umap_whole, umap_individual, nrow=3)
}

#CD8
ggumap(umap_cd8_fcs_dt, umap_cd8_cluster_dt)
## Adding missing grouping variables: `day`

#CD4
ggumap(umap_cd4_fcs_dt, umap_cd4_cluster_dt)
## Adding missing grouping variables: `day`

UMAP Color Plots

color_plots <- function(umap_df) {
  cd4_colors = brewer.pal('Greens', n=9)[c(2,4,6,8,9)]
  cd8_colors = brewer.pal('Oranges', n=9)[c(2,4,6,8,9)]
  color_time <- ggplot(umap_df[][, cd19 := (k562=='cd19+')], 
      aes(x=umap_1, y=umap_2, color=interaction(day, t_type))) +
    geom_point(size=0.1, alpha=0.5) +
    facet_grid(car~cd19) +
    scale_color_manual(values=c(cd4_colors, cd8_colors)) +
    guides(colour = guide_legend(override.aes = list(alpha=1, size=3)))
  
  color_density <- ggplot(umap_df, aes(x=umap_1, y=umap_2)) +
    geom_hex(bins = 70) +
    facet_grid(car~cd19) +
    scale_fill_continuous(
      type = "viridis", limits=c(0,30), oob=scales::squish) +
    theme_bw()
  
  color_markers <- ggplot(
      melt(umap_df, measure.vars=umap_vars)[, 
        value.scaled := scale(value), by='variable'][,
          cd19 := (k562=='cd19+')], 
      aes(x=umap_1, y=umap_2, z=value.scaled)) +
    stat_summary_hex(bins = 70) +
    facet_grid(variable~cd19) +
    scale_fill_distiller(palette='RdYlBu', limits=c(-3, 3), oob=scales::squish) +
    theme_bw()
  
  grid.arrange(color_time, color_density, color_markers, ncol=3)
}

#cd8
color_plots(umap_cd8_fcs_dt)

#cd4
color_plots(umap_cd4_fcs_dt)

## Temporal UMAP Visualizations

Biomarker Diffusion Map Function

(Function called in Cluster Plot)

viz.umap <- function(dat,dr,param.name,limits=NULL){
  ColVal <- pull(dat %>% select(param.name))
  if(is.null(limits)){
    Lim <- quantile(ColVal,probs=seq(0,1,0.01))[c(2,100)]
    p <- ggplot(dat, aes(x = umap_1, y =umap_2)) +geom_point(aes(color = ColVal), size=0.1)+theme_classic()+scale_color_distiller(name=param.name, palette = "RdYlBu", limits=Lim, oob=squish)+theme_bw()+theme(panel.grid.major = element_blank(), panel.grid.minor=element_blank())+ggtitle(param.name)
  } else {
    p <- ggplot(dat, aes(x = umap_1, y = umap_2)) +geom_point(aes(color = ColVal), size=0.1)+theme_classic()+scale_color_distiller(name=param.name, palette = "RdYlBu", limits=c(limits[1],limits[2]), oob=squish)+theme_bw()+theme(panel.grid.major = element_blank(), panel.grid.minor=element_blank())+ggtitle(param.name)
  }
  p
}

visualize_params_umap <- function(df) {
  parameters <- colnames(df)[c(9, 11:15)] # colnames is how to select the right biomarkers
  p <- list()
  for (i in 1:length(parameters)) {
  p[[i]] <- viz.umap(dat=df, param.name=parameters[i])
  }
  do.call(plot_grid,p)
}

Contour Plots

umap_contour_plots <- function(umap_df){
  #car x day
  car_by_day <- umap_df %>% ggplot(., aes(x=umap_1, y=umap_2))+geom_point(size=0.1, color= "#DCDCDC") + geom_density2d(aes(colour = stat(nlevel)), contour_var = "ndensity") + facet_grid(car ~ day) + theme_bw()+theme(panel.grid.major = element_blank(), panel.grid.minor=element_blank(), legend.title = element_blank()) + ggtitle("CAR Densities by Day")

    # clusters by car
  clusters_by_car <- umap_df %>% ggplot(., aes(x=umap_1, y=umap_2))+geom_point(size=0.1, color= "#DCDCDC") +   geom_density2d(aes(colour = stat(nlevel)), contour_var = "ndensity") + facet_grid(cluster ~ car) + theme_bw()+theme(panel.grid.major = element_blank(), panel.grid.minor=element_blank(), legend.title = element_blank())+ ggtitle("CAR Densities by Cluster")
  
  # clusters x day
  clusters_by_day <- umap_df %>% ggplot(., aes(x=umap_1, y=umap_2))+geom_point(size=0.1, color= "#DCDCDC") + geom_density2d(aes(colour = stat(nlevel)), contour_var = "ndensity") + facet_grid(cluster ~ day) + theme_bw()+theme(panel.grid.major = element_blank(), panel.grid.minor=element_blank(), legend.title = element_blank()) + ggtitle("Cluster Densities by Day")
  
  plot_grid(car_by_day,clusters_by_car,clusters_by_day, nrow=3)
  
}

#CD8
umap_contour_plots(umap_cd8_fcs_dt)
## Warning: Computation failed in `stat_density2d()`:
## missing value where TRUE/FALSE needed

## Warning: Computation failed in `stat_density2d()`:
## missing value where TRUE/FALSE needed

## Warning: Computation failed in `stat_density2d()`:
## missing value where TRUE/FALSE needed

## Warning: Computation failed in `stat_density2d()`:
## missing value where TRUE/FALSE needed

## Warning: Computation failed in `stat_density2d()`:
## missing value where TRUE/FALSE needed

#CD4
umap_contour_plots(umap_cd4_fcs_dt)
## Warning: Computation failed in `stat_density2d()`:
## missing value where TRUE/FALSE needed

Grouped Cluster Plots

all_plots <- function(umap_fcs_dt, umap_cluster_dt, cell_type, dendro=T) {
  
  #UMAP
  umap_plot <- ggplot() + 
  geom_point(data=umap_fcs_dt, 
    aes(x=umap_1, y=umap_2, color=cluster), size=0.2, alpha=0.2) +
  geom_label(data=umap_cluster_dt, aes(
    x=mean_umap_1, y=mean_umap_2,
    label=paste(cluster,size,sep='\n'), 
    color=cluster), alpha=0.3) +
  theme_minimal()
  
  #UMAP biomarker Heatmap
  umap_heatmap <- visualize_params_umap(umap_fcs_dt)
  
  #Dendrogram / cluster heatmap
  cluster_pct_vars <- c(paste0(
    names(channel_map),'_t'), 
    'FSC-A','SSC-A','donor','cd19')
  
    max_cluster <- max(as.numeric(umap_fcs_dt$cluster), na.rm=T)
  
  umap_fcs_dt[, cluster:= factor(cluster, levels = as.character(1:max_cluster))]
  
  
  umap_var_melt_dt <-melt(
    umap_fcs_dt[!is.na(donor)], measure.vars= c(umap_vars)) #add back in previously removed vars
  
  # Use z scale to plot params
  umap_var_melt_dt[, value_scaled := scale(value), by=variable]
  param_cluster_pct_dt <- umap_var_melt_dt[
    !is.na(cluster) & variable %in% cluster_pct_vars,
    list(
      clust_mean = mean(value_scaled),
      clust_sd = sd(value_scaled)),
        by=c('variable','cluster')]
  
  if (dendro) {
    # Make dendrogram
    cluster_mean_cast <- dcast(
      param_cluster_pct_dt, 
      variable ~ cluster, 
      value.var='clust_mean')
    
    cluster_dendro_m <- as.matrix(cluster_mean_cast[, -c(1)])
    rownames(cluster_dendro_m) <- unlist(cluster_mean_cast[,1])
    dendro_clusters <- as.dendrogram(hclust(d = dist(x = t(cluster_dendro_m))))
    dendro_vars <- as.dendrogram(hclust(d = dist(x = cluster_dendro_m)))
    
    # Create dendrogram plot
    dendro_vars_plot <- ggdendrogram(data = dendro_vars, rotate = TRUE) + 
      theme(axis.text.y = element_text(size = 6))
    dendro_clusters_plot <- ggdendrogram(data = dendro_clusters, rotate = TRUE) + 
      theme(axis.text.y = element_text(size = 6))
    
    # column order
    cluster_order <- order.dendrogram(dendro_clusters)
    cluster_names <- colnames(cluster_dendro_m[, cluster_order])
    param_cluster_pct_dt[, cluster:= factor(cluster, levels = cluster_names)]
    
    # row order
    var_order <- order.dendrogram(dendro_vars)
    var_names <- row.names(cluster_dendro_m[var_order, ])
    param_cluster_pct_dt$variable <- factor(
        param_cluster_pct_dt$variable,
        levels = var_names)
  } else {
    param_cluster_pct_dt[, cluster:= factor(cluster, levels = as.character(1:max_cluster))]
  }
  
   #annotate clusters as per variable values (removed gfp, myc, cd, donor)
  '%notin%' <- Negate('%in%')
  annotations <- param_cluster_pct_dt %>%
  mutate(variable=gsub("\\_t$", "", variable)) %>%
  filter(variable %notin% c('gfp','myc','cd','donor', 'zombie')) %>%
  mutate(cluster=as.character(cluster)) %>%
  group_by(cluster) %>% 
  mutate(rk = rank(-abs(clust_mean))) %>%
  filter(rk <= 3) %>% 
  arrange(cluster, rk) %>%
  mutate(variable = ifelse(clust_mean > 0, paste(variable,'+',sep = ""), paste(variable,'-',sep = ""))) %>%
  summarise(variable = paste(variable, collapse=", ")) %>% 
  data.table::transpose()
  colnames(annotations) <- as.character(unlist(annotations[1,]))
  annotations = annotations[-1, ]
  if (dendro) {
    annotations <- annotations[cluster_names]
  } else {
    annotations <- annotations[as.character(1:max_cluster)]
  }
  row.names(annotations) <- NULL 
  
  # heatmap
  var_heatmap <- ggplot(param_cluster_pct_dt) + 
      geom_tile(aes(x=cluster, y=variable, fill=clust_mean), color='black') + 
      geom_text(aes(x=cluster, y=variable, label=round(clust_mean, 2)), size=3, color='black') +
      scale_fill_distiller(palette='PiYG', limits=c(-3,3), oob=scales::squish) +
      theme_minimal() # + theme(axis.text.x = element_text(angle=90, hjust=1))
  # col dendro
  if (dendro) {
    dendro_data_col <- dendro_data(dendro_clusters, type = "rectangle")
    dendro_col <- axis_canvas(var_heatmap, axis = "x") + 
        geom_segment(data = segment(dendro_data_col), 
            aes(x = x, y = y, xend = xend, yend = yend))
    plot_dendroheat <- insert_xaxis_grob(var_heatmap, 
                dendro_col, grid::unit(0.2, "null"), position = "top")
    dendro_plot <- ggdraw(plot_dendroheat)
  }
  
  # annotation table
  tt <- ttheme_default(core=list(fg_params = list(fontsize=10), colhead=list(fg_params = list(fontsize=10, parse=TRUE))))
  annotations[1,] = str_wrap(annotations[1,], 0)
  tbl <- tableGrob(annotations, rows=NULL, theme=tt)
  # tbl <- strwrap(tbl, width = 10, simplify = FALSE)
  tbl$widths <- unit(rep(1/ncol(tbl), ncol(tbl)), "npc")
  
  # CLUSTERS BY DAY
  day_cluster_total_pct_dt <- map_day_0(umap_fcs_dt)[,
    data.table(table(day,cluster, t_type, k562))][,
      list(cluster, pct=N/sum(N)), by=c('k562','day','t_type')]
  
  if (dendro) {
    day_cluster_total_pct_dt[, cluster := factor(cluster, levels = cluster_names)]
  } else {
    day_cluster_total_pct_dt[, cluster := factor(cluster, levels = as.character(1:max_cluster))]
  }
  
  clusters_by_day <- ggplot(day_cluster_total_pct_dt[k562=='cd19+'], aes(
      x=cluster, 
      y=factor(day, levels=c(0,6,15,24,33)), 
      fill=pct)) + 
    geom_tile(color='black') +
    geom_text(aes(label=round(pct*100)), size=3, color='white') +
    facet_grid(k562+t_type~., scales='free') +
    scale_fill_viridis_c('',direction=1, limits=c(0, 0.6)) +
    labs(title='Percent of cells in each day by cluster', y='Day',
      subtitle='(rows sum to 100%)') +
    theme_minimal() + ggtitle(paste('Percent of ',as.character(cell_type),' cells in each day by cluster'))
  # CARs BY CLUSTER/DAY
  car_cluster_day_indiv_k562_total_pct_dt <- map_day_0(umap_fcs_dt)[,
    data.table(table(car,cluster,k562,t_type,day))][,
      list(cluster, pct=N/sum(N)), by=c('car','k562','t_type','day')][,
        pct_delta := scale(pct), by=c('car','k562','t_type','day')]
  
  if (dendro) {
    car_cluster_day_indiv_k562_total_pct_dt[, cluster := factor(cluster, levels = cluster_names)] 
  } else {
    car_cluster_day_indiv_k562_total_pct_dt[, cluster := factor(cluster, levels = as.character(1:max_cluster))]
  }
  
  car_cluster_day_indiv_k562_total_pct_dt$day <- factor(car_cluster_day_indiv_k562_total_pct_dt$day, levels=c('0','6','15','24','33'))
  
  min_col <- brewer.pal(name='BrBG', n=11)[2]
  
  cars_by_cluster <- ggplot(car_cluster_day_indiv_k562_total_pct_dt[k562=='cd19+']) + 
      geom_tile(aes(x=cluster, y=car, fill=pct_delta), color='black') +
      facet_grid(day~k562, scales='free') +
      scale_fill_distiller('',
        palette='BrBG', 
        direction=1,
        #limits=c(-2.8, 2.8), 
        na.value=min_col) +
      labs(title='CAR enrichment\nper day per cluster', y='CAR',
        subtitle='CAR enrichment in cluster\n(z-score) (summed across rows)') +
      theme_minimal() + ggtitle(paste(as.character(cell_type),' CAR enrichment\nper day per cluster')) 
  
  # print all plots and table
  plot_grid(umap_plot, umap_heatmap, var_heatmap, clusters_by_day, cars_by_cluster, tbl, labels = c('A', 'B', 'C', 'D', 'E', 'F'), nrow=6, rel_heights = c(3/12, 3/12, 1/12, 1/12, 3/12, 1/12))
} 

#CD8
all_plots(umap_cd8_fcs_dt, umap_cd8_cluster_dt, 'CD8', dendro=F)
## Note: Using an external vector in selections is ambiguous.
## ℹ Use `all_of(param.name)` instead of `param.name` to silence this message.
## ℹ See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This message is displayed once per session.
## `summarise()` ungrouping output (override with `.groups` argument)

#CD4
all_plots(umap_cd4_fcs_dt,umap_cd4_cluster_dt, 'CD4', dendro=F)
## `summarise()` ungrouping output (override with `.groups` argument)

Labeled Leiden Clusters Map

# manually determined 

#CD8
cd8_diff_by_cluster <- data.frame("cluster" = c(1:12), "cell_type" = c('1-TEM', '2-TSCM', '3-TCM', '4-TSCM', '5-TEM', '6-TTM', '7-TEM', '8-TTM', '9-TEM', '10-TEM', '11-TN', '12-TN'))
cd8_diff_by_cluster$cluster <- as.factor(cd8_diff_by_cluster$cluster)
umap_cd8_fcs_dt <- left_join(umap_cd8_fcs_dt, cd8_diff_by_cluster, by='cluster')

#CD4
cd4_diff_by_cluster <- data.frame("cluster" = c(1:14), "cell_type" = c('1-TEM', '2-TEM', '3-TEM', '4-TEM', '5-TCM', '6-TN', '7-TN', '8-TCM', '9-TSCM', '10-TEM', '11-TCM', '12-TCM', '13-TSCM','14-TEM'))
cd4_diff_by_cluster$cluster <- as.factor(cd4_diff_by_cluster$cluster)
umap_cd4_fcs_dt <- left_join(umap_cd4_fcs_dt, cd4_diff_by_cluster, by='cluster')

#Labeled Leiden Clusters Map
visualize_clusters_umap <- function(df) {
  label_clusters_umap <- df %>% group_by(cluster, cell_type) %>% select(umap_1, umap_2) %>% summarize_all(mean)
  ggplot(df, aes(x=umap_1, y=umap_2, color=as.factor(cluster)))+geom_point(size=0.1)+facet_grid(car ~ day) +theme_bw()+theme(panel.grid.major = element_blank(), panel.grid.minor=element_blank())+geom_label_repel(aes(label=cell_type, size=0.5), data=label_clusters_umap)+guides(colour=FALSE)+ggtitle('Leiden clusters')
}

#cd8
visualize_clusters_umap(umap_cd8_fcs_dt)

#cd4
visualize_clusters_umap(umap_cd4_fcs_dt)

Batch Effect (Run when using both donors for UMAP)

#cd8
umap_cd8_fcs_dt %>% group_by(cluster, car, day) %>% count(donor) %>% rename(donor_count = n) %>% group_by(cluster,car,day, donor) %>% summarise(Sum = sum(donor_count)) %>% spread(key = donor, value = Sum) %>% rename(donor1=4, donor2=5) %>% replace(is.na(.), 1) %>% mutate(ratio = donor1/donor2) %>% mutate(ratio = log(ratio)) %>% ggplot(., aes(x=cluster,y=factor(day), fill=ratio)) + geom_tile(colour="white",size=0.25) + scale_fill_gradientn(colors=c('red','white','blue'),values=c(0,.75,1))+ labs(x='Cluster', y='Day') + scale_y_discrete(expand=c(0,0)) + theme_grey(base_size=8) + theme_bw() + theme(legend.text=element_text(face="bold"),axis.ticks=element_line(size=0.4),plot.background=element_blank(),panel.border=element_blank(), panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + facet_grid(.~car)
     

#cd4
umap_cd4_fcs_dt %>% group_by(cluster, car, day) %>% count(donor) %>% rename(donor_count = n) %>% group_by(cluster,car,day, donor) %>% summarise(Sum = sum(donor_count)) %>% spread(key = donor, value = Sum) %>% rename(donor1=4, donor2=5) %>% replace(is.na(.), 1) %>% mutate(ratio = donor1/donor2) %>% mutate(ratio = log(ratio)) %>% ggplot(., aes(x=cluster,y=factor(day), fill=ratio)) + geom_tile(colour="white",size=0.25) + scale_fill_gradientn(colors=c('red','white','blue'),values=c(0,.33,1))+ labs(x='Cluster', y='Day') + scale_y_discrete(expand=c(0,0)) + theme_grey(base_size=8) + theme_bw() + theme(legend.text=element_text(face="bold"),axis.ticks=element_line(size=0.4),plot.background=element_blank(),panel.border=element_blank(), panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + facet_grid(.~car)

Correlation

library(Hmisc)
library(ggcorrplot) 

biomarker_corr <- function(df) {
  biomarkers <- colnames(df)[c(9, 11:15)]
  corr <- round(cor(biomarkers),2)
  p.mat <- cor_pmat(biomarkers)
  ggcorrplot(corr, method = "circle", type = "upper", outline.col = "white", p.mat = p.mat, ggtheme= ggplot2::theme_gray, colors = c("#6D9EC1", "white", "#E46726"))
}

biomarker_corr_by_day <- function(df) {
  plot_grid(df %>% filter(day==0) %>% biomarker_corr()
, df %>% filter(day==6) %>% biomarker_corr(), df %>% filter(day==15) %>% biomarker_corr(), df %>% filter(day==24) %>% biomarker_corr(), df %>% filter(day==33) %>% biomarker_corr(), labels = c('Day 0', 'Day 6', 'Day 15', 'Day 24', 'Day 33'), nrow=3)
}

biomarker_corr_by_car <- function(df) {
  plot_grid(df %>% filter(car=='41BB') %>% biomarker_corr()
, df %>% filter(car=='untrans') %>% biomarker_corr(), df %>% filter(car=='BAFF-R') %>% biomarker_corr(), df %>% filter(car=='CD28') %>% biomarker_corr(), df %>% filter(car=='CD40') %>% biomarker_corr(), df %>% filter(car=='KLRG1') %>% biomarker_corr(), df %>% filter(car=='TACI') %>% biomarker_corr(), df %>% filter(car=='TNR8') %>% biomarker_corr(), df %>% filter(car=='zeta') %>% biomarker_corr(), labels = c('41BB', 'untrans', 'BAFF-R', 'CD28', 'CD40', 'KLRG1', 'TACI', 'TNR8', 'zeta'), nrow=3)
}

#cd8
biomarker_corr_by_day(umap_cd8_fcs_dt)
biomarker_corr_by_car(umap_cd8_fcs_dt)

#cd4
biomarker_corr_by_day(umap_cd4_fcs_dt)
biomarker_corr_by_car(umap_cd4_fcs_dt)